from config.Config import *
from utils.GetUtils import *  # 自定义封装的；类
from data.Data import Data  # 自定义封装的类
import time  # 睡眠
from utils.YygeUtils import *
import openpyxl

# driver = webdriver.Edge()
overallData = []  # 全局数据

# 为方便测试用的 模拟数组
# keywords = ["生态保护成效评估"]

# 生成配置类
config = Config()

GetUtils.config = config
# 登录
driver = YygeUtils.getWebView()  # 获取浏览器驱动对象
# 如果配置了登录就需要先登录
if config.login.switch:
    GetUtils.ToUrl({"driver": driver, "num": 0}, config.login.url)
    # 执行JS - Js中写好登录的逻辑
    driver.execute_script(YygeUtils.getText(config.login.js))
    if config.login.sleep != 0:
        # 线程睡眠5秒
        time.sleep(config.login.sleep)

# 循环遍历数组
print("搜索关键字:", config.search.testWords)

currentWords = config.search.testWords


def crawlerFlow(driver,config,name,index):
    currentData = Data()  # 每遍历一次实例化一个Data对象 Data对象是我们自己定义的
    dataList = []
    data = {}
    filePrefix=name
    if not name:
        filePrefix="page"
    currentData.name = f"{filePrefix}_{index}"  # 为Data对象的name属性赋值

    # 封装一个对象
    driverData = {"driver": driver, "num": 0}  # num 是记录次数 达到三次之后就会重新换一个UA
    if config.search.switch:
        configUrl = config.search.url
        # print("模版地址",configUrl)
        newDriverData=None
        # 判断逻辑URL
        #分支一 既有搜索关键字 也有页码的情况
        if "{keyword}" in configUrl and "{page}" in configUrl:
            # print("条件一")
            # 跳转URL (搜索的URL跟上条件查询的条件)
            newDriverData = GetUtils.ToUrl(driverData, configUrl.format(page=index,keyword=name))

        #分支二 有关键字 无页码的情况
        if "{keyword}" in configUrl and "{page}" not in configUrl:
            # print("条件二")
            # 跳转URL (搜索的URL跟上条件查询的条件)
            newDriverData = GetUtils.ToUrl(driverData, config.search.url.format(keyword=name))

        #分支二 有页码 无关键字的情况
        if "{keyword}" not in configUrl and  "{page}" in configUrl:
            # print("条件三")
            # 跳转URL (搜索的URL跟上条件查询的条件)
            newDriverData = GetUtils.ToUrl(driverData, config.search.url.format(page=index))

        # 执行初始化的JS 其实就是网页存在延迟加载,等待网页元素加载完毕之后再去执行下一步
        print("initSelector=",config.search.initSelector)
        initJs = (YygeUtils.getText("\js\init.js")).replace("|?|",config.search.initSelector)
        # print("初始化JS",initJs)
        error = 0
        try:
            newDriverData["driver"].execute_script(initJs)
        except Exception as e:
            error=1
            pass
        finally:
            if error == 1:
                errorInfo_en = "init.js as Error！target Element not find ！#Element Selector:"+config.search.initSelector
                errorInfo_ch = "异常: 初始化Js报错(init.js)!目标元素未找到 ！#元素选择器:"+config.search.initSelector
                raise Exception(errorInfo_en+"\n"+errorInfo_ch)


    dataFilePath = config.output.dataPath.format(keyword=f"{filePrefix}_{index}")
    targetFilePath = config.output.dataPath.format(keyword=f"{filePrefix}_{index}")
    # 判断文件是否存在
    dataAllPath = os.getcwd() + dataFilePath  # 替换关键字
    if os.path.exists(dataAllPath):
        # 输出文件内容
        jsonText = YygeUtils.getText(dataFilePath);
        if jsonText.strip() != "":
            dataList = json.loads(jsonText)
        else:
            dataList = []
    else:
        YygeUtils.writeText(dataFilePath, "")
        dataList = []

    dataJson = YygeUtils.getText(dataFilePath)
    # ★★★ 异步封装 ★★★
    afterData = GetUtils.tryCatch(newDriverData, f"{filePrefix}_{index}", currentData, dataList, data)
    dataList = []  # 将数组设置为空
    currentData = Data()  # 重新实例化Data
    print(overallData)

# 如果 抓取列表没有数据时
if len(currentWords) == 0:
    # 就用页码抓取
    page = config.search.page
    for index in range(page[0],page[1]):
        crawlerFlow(driver,config,None,index)


for name in currentWords:
    page = config.search.page

    #初始页面和结束页码不一致则会往下执行
    if page[0]!=page[1]:
        for index in range(page[0],page[1]):
            crawlerFlow(driver,config,name,index)
    print(overallData)
